# -*- coding: utf-8 -*-
# ChatGPT/人工混合标注

import openai, json, os, config, random
openai.api_key = config.OPENAI_KEY

def gpt_label(sentence):
    prompt = f"""判断以下句子对企业数字技术风险的态度：
句子：{sentence}
输出：仅返回一个整数 -1（风险暴露） 或 1（风险防范）。"""

    res = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
    try:
        return int(res["choices"][0]["message"]["content"])
    except:
        return None

labeled = []
for js in Path("data/filtered").glob("*.json"):
    sents = json.load(open(js, encoding="utf-8"))
    sample = random.sample(sents, min(len(sents), 10))
    for s in sample:
        label = gpt_label(s)
        labeled.append({"sentence": s, "label": label})

json.dump(labeled, open("data/labeled.json", "w", encoding="utf-8"), ensure_ascii=False, indent=2)
print("✅ 初步标注完成，请人工复核 data/labeled.json")

